Detailed Survival analyis of the Survival lung data.

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

Libraries

data(lung)
## Warning in data(lung): data set 'lung' not found
lung$inst <- NULL
lung$status <- lung$status - 1
lung <- lung[complete.cases(lung),]

pander::pander(table(lung$status))
0 1
47 121
pander::pander(summary(lung$time))
Min. 1st Qu. Median Mean 3rd Qu. Max.
5 175 268 310 416 1022

Exploring Raw Features with RRPlot

convar <- colnames(lung)[lapply(apply(lung,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(lung[,c("status",convar)],"status")
pander::pander(topvar)
age wt.loss
0.106 0.106
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(lung$status,lung[,topf]),
                              atProb=c(0.90),
                  timetoEvent=lung$time,
                  title=topf,
#                  plotRR=FALSE
                  )
  idx <- idx + 1
}

names(RRanalysis) <- topFive

Reporting the Metrics

ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(RRatios) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.588 0.490 0.685
wt.loss 0.556 0.458 0.655
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.557 0.557 0.492 0.614
wt.loss 0.513 0.513 0.455 0.571
pander::pander(RRatios)
  est lower upper
age 0.919 0.645 1.31
wt.loss 0.785 0.462 1.33
pander::pander(LogRangp)
age 0.710
wt.loss 0.358
pander::pander(Sensitivity)
  est lower upper
age 0.0909 0.0463 0.157
wt.loss 0.0496 0.0184 0.105
pander::pander(Specificity)
  est lower upper
age 0.894 0.769 0.965
wt.loss 0.894 0.769 0.965
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1],RRatios[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe","RR")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe RR
age 0.588 0.557 0.0909 0.894 0.919
wt.loss 0.556 0.513 0.0496 0.894 0.785

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=lung,NumberofRepeats = 10)

[++++++++++++++++++++++++++++]..

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
ph.ecog 4.32e-01 1.194 1.541 1.988 0.679 0.649
sex -4.59e-01 0.456 0.632 0.876 0.649 0.679
pat.karno -1.77e-03 0.997 0.998 1.000 0.506 0.720
ph.karno -2.90e-07 1.000 1.000 1.000 0.577 0.720
age 1.37e-07 1.000 1.000 1.000 0.565 0.720
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
ph.ecog 0.601 0.601 0.620 0.600 0.0449 0.405
sex 0.601 0.620 0.601 0.600 0.0285 0.478
pat.karno 0.506 0.585 0.500 0.585 0.0292 0.342
ph.karno 0.577 0.570 0.500 0.570 0.0143 0.280
age 0.565 0.549 0.500 0.549 0.0162 0.195
  z.IDI z.NRI Delta.AUC Frequency
ph.ecog 3.33 2.48 -0.02005 1.0
sex 2.76 2.85 -0.00167 1.0
pat.karno 2.44 2.24 0.08546 1.0
ph.karno 2.22 1.64 0.06998 0.5
age 1.97 1.14 0.04871 0.3

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 2*mean(subset(lung,status==1)$time)

h0 <- sum(lung$status & lung$time <= timeinterval)
h0 <- h0/sum((lung$time > timeinterval) | (lung$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.85 578
index <- predict(ml,lung)

rdata <- cbind(lung$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=lung$time,
                     title="Raw Train: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
1.65 1.37 1.97
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.21 1.21 1.16 1.25
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.2 1.2 1.19 1.21
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.651 0.651 0.592 0.709
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.692 0.599 0.785
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.273 0.196 0.361
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.649
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.31 1.11 1.54
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 10.879375 on 1 degrees of freedom, p = 0.000972
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 130 88 101.3 1.76 10.9
class=1 38 33 19.7 9.05 10.9

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,lung,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
1.29 1.52 749

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(lung$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=lung$time,
                     title="Train: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
1.45 1.2 1.73
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.04 1.04 1 1.08
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.01 1.01 1 1.01
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.651 0.652 0.589 0.711
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.692 0.599 0.785
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.273 0.196 0.361
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.796
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.31 1.11 1.54
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 10.879375 on 1 degrees of freedom, p = 0.000972
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 130 88 101.3 1.76 10.9
class=1 38 33 19.7 9.05 10.9

Cross-Validation

rcv <- randomCV(theData=lung,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.95,
                repetitions=200,
                classSamplingType = "Pro"
         )

.[++++].[+-].[+++].[+].[++++].[++].[+++].[+++].[++].[+++]10 Tested: 72 Avg. Selected: 3.6 Min Tests: 1 Max Tests: 5 Mean Tests: 1.388889 . MAD: 0.4659645

.[++].[+++].[++].[++++].[+++].[+++].[+++].[+++].[+++].[++]20 Tested: 117 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 5 Mean Tests: 1.709402 . MAD: 0.4716037

.[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[+++].[++]30 Tested: 140 Avg. Selected: 3.733333 Min Tests: 1 Max Tests: 6 Mean Tests: 2.142857 . MAD: 0.4723033

.[+].[+++].[++].[+++].[++].[++].[++].[+++].[+++].[+++]40 Tested: 151 Avg. Selected: 3.65 Min Tests: 1 Max Tests: 7 Mean Tests: 2.649007 . MAD: 0.4734134

.[+++].[++].[+++].[+++].[+++].[+++].[+++].[+++].[+++].[+++]50 Tested: 154 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 9 Mean Tests: 3.246753 . MAD: 0.4742514

.[+++].[+].[++].[+++].[++].[+++].[+++].[+].[++].[+++]60 Tested: 163 Avg. Selected: 3.633333 Min Tests: 1 Max Tests: 10 Mean Tests: 3.680982 . MAD: 0.4763734

.[++++].[++].[++].[+++].[+++].[++-].[+].[+++].[+++].[+++]70 Tested: 166 Avg. Selected: 3.628571 Min Tests: 1 Max Tests: 10 Mean Tests: 4.216867 . MAD: 0.4769776

.[++].[++++].[+++].[+++].[+++].[+++].[+++].[+++].[+].[+++]80 Tested: 166 Avg. Selected: 3.65 Min Tests: 1 Max Tests: 11 Mean Tests: 4.819277 . MAD: 0.4770715

.[+++].[++].[++].[+++].[+].[++].[+].[+++].[+++].[+++]90 Tested: 168 Avg. Selected: 3.611111 Min Tests: 1 Max Tests: 12 Mean Tests: 5.357143 . MAD: 0.4765639

.[+++].[++++].[++].[++++].[++++].[+++].[+++].[+++].[++].[++]100 Tested: 168 Avg. Selected: 3.65 Min Tests: 1 Max Tests: 13 Mean Tests: 5.952381 . MAD: 0.4765173

.[+++].[++].[+].[+++].[+++].[+++].[+++].[+++].[+++].[+++]110 Tested: 168 Avg. Selected: 3.654545 Min Tests: 1 Max Tests: 13 Mean Tests: 6.547619 . MAD: 0.4763035

.[++].[+++].[+++].[+++].[++-].[+++].[+++].[+++].[++].[+]120 Tested: 168 Avg. Selected: 3.641667 Min Tests: 1 Max Tests: 14 Mean Tests: 7.142857 . MAD: 0.4762139

.[++].[+++].[++].[+++].[+++].[+++].[+++].[+++].[++++].[+++]130 Tested: 168 Avg. Selected: 3.661538 Min Tests: 2 Max Tests: 15 Mean Tests: 7.738095 . MAD: 0.4762785

.[+++].[+++-].[++].[+++].[++-].[+++].[++++].[+++].[+++].[+-]140 Tested: 168 Avg. Selected: 3.664286 Min Tests: 2 Max Tests: 15 Mean Tests: 8.333333 . MAD: 0.4762601

.[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[+++]150 Tested: 168 Avg. Selected: 3.68 Min Tests: 2 Max Tests: 17 Mean Tests: 8.928571 . MAD: 0.4761804

.[+++].[+++].[++].[++].[++].[+++].[+++].[+++].[+++].[++]160 Tested: 168 Avg. Selected: 3.675 Min Tests: 2 Max Tests: 18 Mean Tests: 9.52381 . MAD: 0.4756904

.[+++].[+++].[+].[+++].[++].[++++].[++].[+++-].[+++].[++]170 Tested: 168 Avg. Selected: 3.670588 Min Tests: 2 Max Tests: 19 Mean Tests: 10.11905 . MAD: 0.4755905

.[++].[++].[+++].[+++].[++].[+].[++++].[+++-].[+++].[+++]180 Tested: 168 Avg. Selected: 3.666667 Min Tests: 4 Max Tests: 21 Mean Tests: 10.71429 . MAD: 0.4757178

.[+++].[+++].[++].[+++].[++].[+++].[++].[++].[++].[+]190 Tested: 168 Avg. Selected: 3.647368 Min Tests: 4 Max Tests: 23 Mean Tests: 11.30952 . MAD: 0.475742

.[+++].[+++].[++++].[+++].[++].[++].[+++].[+++].[+++].[++]200 Tested: 168 Avg. Selected: 3.655 Min Tests: 4 Max Tests: 23 Mean Tests: 11.90476 . MAD: 0.475456

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atProb=c(0.90),
                     timetoEvent=times,
                     title="Test: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
1.45 1.2 1.73
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.03 1.03 0.994 1.07
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
0.952 0.952 0.94 0.964
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.601 0.6 0.53 0.665
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.603 0.502 0.704
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.19 0.124 0.271
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.807
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.23 1.01 1.48
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.346444 on 1 degrees of freedom, p = 0.125569
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 140 98 103.8 0.328 2.35
class=1 28 23 17.2 1.981 2.35

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.85 1 755
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Test Performance

pander::pander(t(rrAnalysisTest$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
1.45 1.21 1.74
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.04 1.04 0.999 1.08
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
0.952 0.952 0.941 0.963
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.601 0.6 0.534 0.666
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.603 0.502 0.704
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.19 0.124 0.271
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.807
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.23 1.01 1.48
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.346444 on 1 degrees of freedom, p = 0.125569
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 140 98 103.8 0.328 2.35
class=1 28 23 17.2 1.981 2.35